clear all
set more off
set matsize 10000
set maxvar 10000
adopath + ../code/gslab_tools/

preliminaries, doutf(../derived/Antipsychotics)
graph set window fontface "Times New Roman"

/**********
* Import Study Characteristics *
***********/

import excel "../raw/Antipsychotics/Antipsychotic_Drug_Data.xlsx", ///
	sheet("Sheet1") cellrange(A4:AJ549) firstrow clear

* Variable Names & Labels
rename A studyname
rename B year	
rename C dosage
rename D n
rename E number_female
rename F mean_age
rename G lengthoftrial
rename H mean_illness_duration_in_years
rename I diagnosis
rename J efficacy_baseline_PANSS_mean
rename K efficacy_baseline_PANSS_SD
rename L efficacy_baseline_PANSS_pos_mean
rename M efficacy_baseline_PANSS_neg_mean
rename N efficacy_baseline_BPRS_mean
rename O efficacy_baseline_BPRS_SD
rename P efficacy_baseline_CGI_mean
rename Q efficacy_baseline_CGI_SD
rename R efficacy_endpoint_PANSS_mean
rename S efficacy_endpoint_PANSS_SD
rename T efficacy_endpoint_PANSS_pos_mean
rename U efficacy_endpoint_PANSS_neg_mean
rename V efficacy_endpoint_BPRS_mean
rename W efficacy_endpoint_BPRS_SD
rename X efficacy_endpoint_CGI_mean
rename Y efficacy_endpoint_CGI_SD
rename Z efficacy_change_PANSS_mean
rename AA efficacy_change_PANSS_SD
rename AB efficacy_change_BPRS_mean
rename AC efficacy_change_BPRS_SD
rename AD efficacy_change_CGI_mean
rename AE efficacy_change_CGI_SD
rename AF sponsor_str
rename AG sponsor_remove_coi_str
rename AH sponsor_descrip
rename AI sponsor_category
rename AJ dropouts

gen sponsor = 1 if sponsor_str == "Yes" | sponsor_str=="Unclear"
replace sponsor =0 if sponsor_str=="No"
gen sponsor_remove_coi = 1 if sponsor_remove_coi_str == "Yes" | sponsor_remove_coi_str=="Unclear"
replace sponsor_remove_coi =0 if sponsor_remove_coi_str=="No"
drop sponsor_descrip sponsor_category

* Drug & Dosage
format diagnosis %40s
destring n, replace force
gen share_female =number_female / n
egen study_no = group(studyname)

gen drug = substr(dosage, 1, strpos(dosage," ")-1 )
replace drug = dosage if drug ==""
order year study_no drug
replace dosage = substr(dosage, strpos(dosage, " ")+1,.)
replace dosage = "" if dosage==drug
gen dose_avg = substr(dosage, 1, strpos(dosage, " ")-1) if strpos(dosage, " ")>0
replace dose_avg = dosage if strpos(dosage, "(")<=0
gen dose_min = substr(dosage, strpos(dosage,"(")+1, (strpos(dosage, "-")-1)-(strpos(dosage,"("))) if strpos(dosage, "(")>0
gen dose_max = substr(dosage, strpos(dosage, "-")+1, (strpos(dosage, ")")-1)-(strpos(dosage, "-"))) if strpos(dosage, ")")>0
destring dose_avg, replace force
destring dose_min, replace force
destring dose_max, replace force
replace dose_min = dose_avg if dose_min ==.
replace dose_max = dose_avg if dose_max ==.
order year study_no drug dosage dose_avg dose_min dose_max

destring year n number_female mean_age lengthoftrial mean_illness_duration_in_years dropouts, replace force
destring efficacy_baseline_PANSS_mean efficacy_baseline_PANSS_SD efficacy_baseline_PANSS_pos_mean efficacy_baseline_PANSS_neg_mean, replace force
destring efficacy_baseline_BPRS_mean efficacy_baseline_BPRS_SD efficacy_baseline_CGI_mean efficacy_baseline_CGI_SD, replace force
destring efficacy_endpoint_PANSS_mean efficacy_endpoint_PANSS_SD efficacy_endpoint_PANSS_pos_mean efficacy_endpoint_PANSS_neg_mean, replace force
destring efficacy_endpoint_BPRS_mean efficacy_endpoint_BPRS_SD efficacy_endpoint_CGI_mean efficacy_endpoint_CGI_SD, replace force
destring efficacy_change_PANSS_mean efficacy_change_PANSS_SD efficacy_change_BPRS_mean efficacy_change_BPRS_SD efficacy_change_CGI_mean efficacy_change_CGI_SD, replace force

replace drug = "amisulpride" if drug=="AMI"
replace drug = "aripiprazole" if drug=="ARI"
replace drug = "asenapine" if drug=="ASE"
replace drug = "clozapine" if drug=="CLO"
replace drug = "chlorpromazine" if drug=="CPZ" | drug=="CPZ(-200)"
replace drug = "haloperidol" if drug=="HAL"
replace drug = "iloperidone" if drug=="ILO"
replace drug = "lurasidone" if drug=="LURA" | drug=="LURA120" | drug=="LUR"
replace drug = "olanzapine" if drug=="OLA"
replace drug = "paliperidone" if drug=="PAL"
replace drug = "placebo" if drug=="PBO"
replace drug = "quetiapine" if drug=="QUE"
replace drug = "risperidone" if drug=="RIS"
replace drug = "sertindole" if drug=="SER"
replace drug = "ziprasidone" if drug=="ZIP"
replace drug = "zotepine" if drug=="ZOT"
replace drug = "perphenazine" if drug =="PER"
*Keep in extra drug arms
gen addl_drug = strpos(drug,"[")==1 | drug=="FGA" | drug=="PER" | drug=="Blonanserin" ///
	| drug=="Sonepiprazole" 


* Approval and Patent Year
* Not approved by the FDA in the USA now
gen approve_year = . if drug == "amisulpride"
replace approve_year = 2002 if drug == "aripiprazole"
replace approve_year = 2009 if drug == "asenapine"
replace approve_year = 1989 if drug == "clozapine"
replace approve_year = 1957 if drug == "chlorpromazine"
replace approve_year = 1967 if drug == "haloperidol"
replace approve_year = 2009 if drug == "iloperidone"
replace approve_year = 2010 if drug == "lurasidone"
replace approve_year = 1996 if drug == "olanzapine"
replace approve_year = 2006 if drug == "paliperidone"
replace approve_year = . if drug == "placebo"
replace approve_year = 1997 if drug == "quetiapine"
replace approve_year = 1993 if drug == "risperidone"
*Not approved in US
replace approve_year = . if drug == "sertindole"
replace approve_year = 2001 if drug == "ziprasidone"
*Not approved in US
replace approve_year = . if drug == "zotepine"

gen patent_year = 2008 if drug == "amisulpride"
replace patent_year = 2014 if drug == "aripiprazole"
replace patent_year = 2020 if drug == "asenapine"
replace patent_year = 1998 if drug == "clozapine"
replace patent_year = 1970 if drug == "chlorpromazine"
replace patent_year = 1986 if drug == "haloperidol"
replace patent_year = 2016 if drug == "iloperidone"
replace patent_year = 2019 if drug == "lurasidone"
replace patent_year = 2011 if drug == "olanzapine"
replace patent_year = 2019 if drug == "paliperidone"
replace patent_year = . if drug == "placebo"
replace patent_year = 2017 if drug == "quetiapine"
replace patent_year = 2003 if drug == "risperidone"
replace patent_year = . if drug == "sertindole"
replace patent_year = 2012 if drug == "ziprasidone"
replace patent_year = . if drug == "zotepine"

gen drug_type = "2nd Gen" if drug == "amisulpride"
replace drug_type = "2nd Gen" if drug == "aripiprazole"
replace drug_type = "2nd Gen" if drug == "asenapine"
replace drug_type = "2nd Gen" if drug == "clozapine"
replace drug_type = "1st Gen" if drug == "chlorpromazine"
replace drug_type = "1st Gen" if drug == "haloperidol"
replace drug_type = "2nd Gen" if drug == "iloperidone"
replace drug_type = "2nd Gen" if drug == "lurasidone"
replace drug_type = "2nd Gen" if drug == "olanzapine"
replace drug_type = "2nd Gen" if drug == "paliperidone"
replace drug_type = "Placebo" if drug == "placebo"
replace drug_type = "2nd Gen" if drug == "quetiapine"
replace drug_type = "2nd Gen" if drug == "risperidone"
replace drug_type = "2nd Gen" if drug == "sertindole"
replace drug_type = "2nd Gen" if drug == "ziprasidone"
replace drug_type = "2nd Gen" if drug == "zotepine"

gen firm = "Sanofi-Aventis" if drug == "amisulpride"
replace firm = "Bristol-Myers Squibb" if drug == "aripiprazole" & year<=2010
replace firm = "Lundbeck/Forest" if drug == "aripiprazole" & year>=2011
replace firm = "Organon" if drug == "asenapine"
replace firm = "Wander,Sandoz" if drug == "clozapine"
replace firm = "GSK" if drug == "chlorpromazine"
replace firm = "Janssen" if drug == "haloperidol"
replace firm = "Vanda" if drug == "iloperidone"
replace firm = "Danippon, Sunovion" if drug == "lurasidone"
replace firm = "Eli Lilly" if drug == "olanzapine"
replace firm = "Janssen" if drug == "paliperidone"
replace firm = "Placebo" if drug == "placebo"
replace firm = "AstraZenca" if drug == "quetiapine"
replace firm = "Janssen" if drug == "risperidone"
replace firm = "Lundbeck/Forest" if drug == "sertindole"
replace firm = "Pfizer" if drug == "ziprasidone"
replace firm = "Fujiswa" if drug == "zotepine"

gen placebo_control_temp = drug=="placebo"
bys study_no: egen placebo_control = max(placebo_control_temp)
foreach var in PANSS BPRS CGI {
	replace efficacy_change_`var'_mean = -1* efficacy_change_`var'_mean
	gen efficacy_`var'_temp = efficacy_change_`var'_mean if drug=="placebo"
	bys study_no (efficacy_change_`var'_mean): replace efficacy_`var'_temp = efficacy_change_`var'_mean ///
		if _n==1 & placebo_control==0
	bys study_no: egen placebo_effect_`var'=min(efficacy_`var'_temp)
	gen efficacy_`var'_relative = efficacy_change_`var'_mean - placebo_effect_`var'
	
	gen efficacy_perc_`var'_mean = efficacy_change_`var'_mean / efficacy_baseline_`var'_mean
	gen efficacy_perc_`var'_temp = efficacy_perc_`var'_mean if drug=="placebo"
	bys study_no (efficacy_perc_`var'_mean): replace efficacy_perc_`var'_temp = efficacy_perc_`var'_mean ///
		if _n==1 & placebo_control==0
	bys study_no: egen placebo_effect_perc_`var'=min(efficacy_perc_`var'_temp)
	gen efficacy_perc_`var'_relative = efficacy_perc_`var'_mean - placebo_effect_perc_`var'
	
	
	summ efficacy_change_`var'_mean
	gen stdz_efficacy_change_`var' = (efficacy_change_`var'_mean - `r(mean)') / `r(sd)'
	
	gen stdz_efficacy_`var'_temp = stdz_efficacy_change_`var' if drug=="placebo"
	bys study_no (stdz_efficacy_change_`var'): replace stdz_efficacy_`var'_temp = stdz_efficacy_change_`var' ///
		if _n==1 & placebo_control==0
	bys study_no: egen stdz_placebo_effect_`var'=min(stdz_efficacy_`var'_temp)
	gen stdz_efficacy_`var'_relative = stdz_efficacy_change_`var' - stdz_placebo_effect_`var'
}

gen outcome_all = efficacy_change_PANSS_mean
replace outcome_all = efficacy_change_BPRS_mean if outcome_all == .
replace outcome_all = efficacy_change_CGI_mean if outcome_all == .

gen stdz_outcome_all = stdz_efficacy_change_PANSS
replace stdz_outcome_all = stdz_efficacy_change_BPRS if stdz_outcome_all == .
replace stdz_outcome_all = stdz_efficacy_change_CGI if stdz_outcome_all == .

gen stdz_outcome_relative_all  = stdz_efficacy_PANSS_relative
replace stdz_outcome_relative_all  = stdz_efficacy_BPRS_relative if stdz_outcome_relative_all==.
replace stdz_outcome_relative_all  = stdz_efficacy_CGI_relative if stdz_outcome_relative_all==.

gen efficacy_perc = efficacy_perc_PANSS_mean
replace efficacy_perc = efficacy_perc_BPRS_mean if efficacy_perc == .
replace efficacy_perc = efficacy_perc_CGI_mean if efficacy_perc == .

gen efficacy_perc_relative = efficacy_perc_PANSS_relative
replace efficacy_perc_relative = efficacy_perc_BPRS_relative if efficacy_perc_relative == .
replace efficacy_perc_relative = efficacy_perc_CGI_relative if efficacy_perc_relative == .

bys study_no (stdz_outcome_relative_all): gen best_outcome=_n==_N

gen meanbaselineseverity = efficacy_baseline_PANSS_mean
replace meanbaselineseverity = efficacy_baseline_BPRS_mean if meanbaselineseverity ==.
replace meanbaselineseverity = efficacy_baseline_CGI_mean if meanbaselineseverity ==.

drop *temp *placebo* *_baseline_* *endpoint*
rename n no_randomised
drop if sponsor==. & efficacy_change_PANSS_mean==. & efficacy_change_BPRS_mean==. ////
	& efficacy_change_CGI_mean==.

*Order drugs correctly
gen drug_order="zplacebo" if drug=="placebo"
replace drug_order = "a" if drug!="placebo"
gen neg_outcome = 1-outcome_all
bys study_no (drug_order neg_outcome): gen num_drug=_n
drop drug_order neg_outcome
save ../derived/Antipsychotics/antipsychotic.dta, replace


*Generate significance score for response
use ../derived/Antipsychotics/antipsychotic.dta, clear
keep study_no study_no drug num_drug efficacy_change* no_random
rename efficacy_change_PANSS_mean effect
replace effect = efficacy_change_BPRS_mean if effect == .
replace effect = efficacy_change_CGI_mean if effect == .

rename efficacy_change_PANSS_SD sd
replace sd = efficacy_change_BPRS_SD if sd == .
replace sd = efficacy_change_CGI_SD if sd == .
keep study_no drug effect sd num_drug no_random

reshape wide drug effect sd no_randomised, i(study_no) j(num_drug)

*Generate significance score for percent decrease
gen z_overall2_p1 = (effect1 - effect2) / sqrt((sd1^2/no_randomised1) + (sd2^2 / no_randomised2))
gen z_overall2_p2 = (effect2 - effect1) / sqrt((sd1^2/no_randomised1) + (sd2^2 / no_randomised2))

gen z_overall3_p1 = (effect1 - effect3) / sqrt((sd1^2/no_randomised1) + (sd3^2 / no_randomised3)) ///
	if drug3!="" &drug4==""
gen z_overall3_p2 =(effect2 - effect3) / sqrt((sd2^2/no_randomised2) + (sd3^2 / no_randomised3)) ///
	if drug3!="" &drug4==""
gen z_overall3_p3=0 if effect3!=. & drug3!="" &drug4==""

gen z_overall4_p1 =(effect1 - effect4) / sqrt((sd1^2/no_randomised1) + (sd4^2 / no_randomised4)) ///
	if drug4!="" & drug5==""
gen z_overall4_p2 =(effect2 - effect4) / sqrt((sd2^2/no_randomised2) + (sd4^2 / no_randomised4)) ///
	if drug4!="" & drug5==""	 
gen z_overall4_p3 =(effect3 - effect4) / sqrt((sd3^2/no_randomised3) + (sd4^2 / no_randomised4)) ///
	if drug4!="" & drug5==""
gen z_overall4_p4=0 if effect4!=. & drug4!="" & drug5==""
	
gen z_overall5_p1 =(effect1 - effect5) / sqrt((sd1^2/no_randomised1) + (sd5^2 / no_randomised5)) ///
	if drug5!="" & drug6==""
gen z_overall5_p2 =(effect2 - effect5) / sqrt((sd2^2/no_randomised2) + (sd5^2 / no_randomised5)) ///
	if drug5!="" & drug6==""	 
gen z_overall5_p3 =(effect3 - effect5) / sqrt((sd3^2/no_randomised3) + (sd5^2 / no_randomised5)) ///
	if drug5!="" & drug6==""
gen z_overall5_p4 =(effect4 - effect5) / sqrt((sd4^2/no_randomised4) + (sd5^2 / no_randomised5)) ///
	if drug5!="" & drug6==""
gen z_overall5_p5=0 if effect5!=. & drug5!="" & drug6==""

gen z_overall6_p1 =(effect1 - effect6) / sqrt((sd1^2/no_randomised1) + (sd6^2 / no_randomised6)) ///
	if drug6!="" & drug7==""
gen z_overall6_p2 =(effect2 - effect6) / sqrt((sd2^2/no_randomised2) + (sd6^2 / no_randomised6)) ///
	if drug6!="" & drug7=="" 
gen z_overall6_p3 =(effect3 - effect6) / sqrt((sd3^2/no_randomised3) + (sd6^2 / no_randomised6)) ///
	if drug6!="" & drug7==""
gen z_overall6_p4 =(effect4 - effect6) / sqrt((sd4^2/no_randomised4) + (sd6^2 / no_randomised6)) ///
	if drug6!="" & drug7==""
gen z_overall6_p5 =(effect5 - effect6) / sqrt((sd5^2/no_randomised5) + (sd6^2 / no_randomised6)) ///
	if drug6!="" & drug7==""
gen z_overall6_p6=0 if effect6!=. & drug6!="" & drug7==""	
	
gen z_overall7_p1 =(effect1 - effect7) / sqrt((sd1^2/no_randomised1) + (sd7^2 / no_randomised7)) ///
	if drug7!=""
gen z_overall7_p2 =(effect2 - effect7) / sqrt((sd2^2/no_randomised2) + (sd7^2 / no_randomised7)) ///
	if drug7!=""
gen z_overall7_p3 =(effect3 - effect7) / sqrt((sd3^2/no_randomised3) + (sd7^2 / no_randomised7)) ///
	if drug7!=""
gen z_overall7_p4 =(effect4 - effect7) / sqrt((sd4^2/no_randomised4) + (sd7^2 / no_randomised7)) ///
	if drug7!=""
gen z_overall7_p5 =(effect5 - effect7) / sqrt((sd5^2/no_randomised5) + (sd7^2 / no_randomised7)) ///
	if drug7!=""
gen z_overall7_p6 =(effect6 - effect7) / sqrt((sd6^2/no_randomised6) + (sd7^2 / no_randomised7)) ///
	if drug7!=""
gen z_overall7_p7=0 if effect7!=. & drug7!=""
	
	
gen z_outcome1 = z_overall2_p1
replace z_outcome1 = z_overall3_p1 if z_overall3_p1!=.
replace z_outcome1 = z_overall4_p1 if z_overall4_p1!=.
replace z_outcome1 = z_overall5_p1 if z_overall5_p1!=.
replace z_outcome1 = z_overall6_p1 if z_overall6_p1!=.
replace z_outcome1 = z_overall7_p1 if z_overall7_p1!=.

gen z_outcome2 = z_overall2_p2
replace z_outcome2 = z_overall3_p2 if z_overall3_p2!=.
replace z_outcome2 = z_overall4_p2 if z_overall4_p2!=.
replace z_outcome2 = z_overall5_p2 if z_overall5_p2!=.
replace z_outcome2 = z_overall6_p2 if z_overall6_p2!=.
replace z_outcome2 = z_overall7_p2 if z_overall7_p2!=.

gen z_outcome3 = z_overall3_p3
replace z_outcome3 = z_overall4_p3 if z_overall4_p3!=.
replace z_outcome3 = z_overall5_p3 if z_overall5_p3!=.
replace z_outcome3 = z_overall6_p3 if z_overall7_p3!=.
replace z_outcome3 = z_overall7_p3 if z_overall7_p3!=.

gen z_outcome4 = z_overall4_p4
replace z_outcome4 = z_overall5_p4 if z_overall5_p4!=.
replace z_outcome4 = z_overall6_p4 if z_overall6_p4!=.
replace z_outcome4 = z_overall7_p3 if z_overall7_p4!=.

gen z_outcome5 = z_overall5_p5 if z_overall5_p5!=.
replace z_outcome5 = z_overall6_p5 if z_overall6_p5!=.
replace z_outcome5 = z_overall7_p5 if z_overall7_p5!=.

gen z_outcome6 = z_overall6_p6 if z_overall6_p6!=.
replace z_outcome6 = z_overall7_p6 if z_overall7_p6!=.
gen z_outcome7 = z_overall7_p7 if z_overall7_p7!=.

drop z_overall*
gen placebo_control = drug1=="placebo" | drug2=="placebo" | drug3=="placebo" ///
	| drug4=="placebo" | drug5=="placebo" | drug6=="placebo" | drug7=="placebo"
forvalues n=1/7{
	gen sig5_outcome`n' = z_outcome`n'>=1.96 & !missing(z_outcome`n') if placebo_control ==0
	replace sig5_outcome`n' = z_outcome`n'>=1.64 & !missing(z_outcome`n') if placebo_control ==1
	replace sig5_outcome`n' = . if missing(z_outcome`n')
	
	gen sig10_outcome`n' = z_outcome`n'>=1.64 & !missing(z_outcome`n') if placebo_control ==0
	replace sig10_outcome`n' = z_outcome`n'>=1.282 & !missing(z_outcome`n') if placebo_control ==1
	replace sig10_outcome`n' = . if missing(z_outcome`n')	
}
keep study_no drug* z_* sig*
reshape long z_outcome sig5_outcome sig10_outcome drug, ////
	i(study_no) j(num_drug)
drop if drug==""
save ../derived/Antipsychotics/significance.dta, replace


use ../derived/Antipsychotics/antipsychotic.dta, clear
merge 1:1 study_no num_drug using ../derived/Antipsychotics/significance.dta, keepusing(z_* sig*) ///
	assert(3) keep(3) nogen
gen sample = "schiz"
gen scale = "PANSS" if efficacy_change_PANSS_mean != .
replace scale = "BPRS" if scale == "" & efficacy_change_BPRS_mean !=.
replace scale = "CGI" if scale == "" & efficacy_change_CGI_mean !=.
rename dropouts dropouts_total
save ../derived/Antipsychotics/antipsychotic.dta, replace


*Create list of unique drug combos
use ../derived/Antipsychotics/antipsychotic.dta, clear
keep study_no drug
duplicates drop
replace drug="zplacebo" if drug=="placebo"
bys study_no (drug): gen n=_n
replace drug="placebo" if drug=="zplacebo"
reshape wide drug, i(study_no) j(n)
bys drug*: gen temp = _n==1
gen drug_combo_no = sum(temp)
drop temp
save ../derived/Antipsychotics/antipsychotic_drug_combos.dta, replace

cap erase ../derived/Antipsychotics/significance.dta
